|
02_forecast_regional_sales.py
|
import pandas as pd
from prophet import Prophet
import warnings
warnings.filterwarnings('ignore')
# -------------------------------
# LOAD MONTHLY REGIONAL DATA
# -------------------------------
df = pd.read_csv(r"data/Processed/monthly_regional_sales.csv")
df["Month"] = pd.to_datetime(df["Month"])
print("Loaded monthly regional sales")
print(f"Date range: {df['Month'].min()} to {df['Month'].max()}")
print(f"Regions: {df['Region'].unique()}")
# -------------------------------
# FORECAST EACH REGION SEPARATELY
# -------------------------------
regions = df["Region"].unique()
forecast_periods = 6 # Forecast 6 months ahead
all_forecasts = []
all_historical = []
for region in regions:
print(f"\n--- Processing {region} ---")
# Filter data for this region
region_data = df[df["Region"] == region].copy()
# Prepare data for Prophet (requires 'ds' and 'y' columns)
prophet_data = region_data[["Month", "Sales"]].copy()
prophet_data.columns = ["ds", "y"]
# Train Prophet model for this region
model = Prophet(
yearly_seasonality=True,
weekly_seasonality=False,
daily_seasonality=False,
seasonality_mode='multiplicative' # Better for sales data
)
model.fit(prophet_data)
# Create future dataframe
future = model.make_future_dataframe(periods=forecast_periods, freq="M")
# Generate predictions
forecast = model.predict(future)
# Extract relevant forecast columns
forecast_output = forecast[["ds", "yhat", "yhat_lower", "yhat_upper"]].copy()
forecast_output["Region"] = region
# Separate historical and future data
last_historical_date = prophet_data["ds"].max()
# Historical data (actuals)
historical = forecast_output[forecast_output["ds"] <= last_historical_date].copy()
historical["Sales"] = prophet_data["y"].values
historical["Type"] = "Actual"
historical = historical[["ds", "Region", "Sales", "Type"]]
# Future forecasts
future_forecast = forecast_output[forecast_output["ds"] > last_historical_date].copy()
future_forecast.rename(columns={"yhat": "Sales"}, inplace=True)
future_forecast["Type"] = "Forecast"
future_forecast = future_forecast[["ds", "Region", "Sales", "Type"]]
# Store results
all_historical.append(historical)
all_forecasts.append(future_forecast)
print(f"{region}: {len(historical)} historical records, {len(future_forecast)} forecast records")
# -------------------------------
# COMBINE ALL REGIONS
# -------------------------------
# Combine historical data from all regions
combined_historical = pd.concat(all_historical, ignore_index=True)
# Combine forecasts from all regions
combined_forecasts = pd.concat(all_forecasts, ignore_index=True)
# Merge historical and forecast data
final_data = pd.concat([combined_historical, combined_forecasts], ignore_index=True)
# Rename date column for Power BI
final_data.rename(columns={"ds": "Month"}, inplace=True)
# Sort by date and region
final_data = final_data.sort_values(["Month", "Region"]).reset_index(drop=True)
print("\n--- FINAL DATASET ---")
print(f"Total records: {len(final_data)}")
print(f"Actual records: {len(final_data[final_data['Type'] == 'Actual'])}")
print(f"Forecast records: {len(final_data[final_data['Type'] == 'Forecast'])}")
# -------------------------------
# SAVE FOR POWER BI
# -------------------------------
output_path = r"data/Predictions/regional_sales_with_forecast.csv"
final_data.to_csv(output_path, index=False)
print(f"\nForecast saved to: {output_path}")
print("\nSample of final data:")
print(final_data.tail(15))